from pyspark import SparkContext, RDD
from pyspark.sql import SparkSession

ss = SparkSession.builder.getOrCreate()
sc = SparkContext.getOrCreate()

# rdd = sc.parallelize((1,2,3,4,5,6,7,8))
# assert isinstance(rdd, RDD)
# df2 = ss.createDataFrame(rdd)
# df2.show()

df = ss.read.csv("file:///Users/sonto/Workspace/Rimi/P1902/spark_example/UserProfile.csv", sep=",")
df.createOrReplaceTempView("user")
ss.sql("SELECT _c9, count(1) FROM user group by _c9").show()
#
# df.printSchema()
# y = df.select('_c5', df['_c8'] + 1)
# x = y.filter(df['_c8'] > 1)
# print(type(x))
# x.show()
